from __future__ import division
import random
import math
import os

print "Program Begins..."
inputFileName = "/data3/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/TrainingSet20130609_sortedByQueryTerm"
inputFileHandler = open(inputFileName,"r")
previousQueryTerm = ""
currentQueryTermPostingSelectedList = []
numOfTermsDealWith = 0
for line in inputFileHandler.readlines():
    lineElements = line.strip().split(" ")
    currentQueryTerm = lineElements[4]
    currentSelectedIndex = int(lineElements[0])
    if previousQueryTerm != currentQueryTerm:
        # in front of the past, let's face it
        currentQueryTermPostingSelectedList.sort(cmp=None, key=None, reverse=False)
        print "Overall Statistics:"
        print "Term:",previousQueryTerm,len(currentQueryTermPostingSelectedList),currentQueryTermPostingSelectedList
        numOfTermsDealWith += 1
        if numOfTermsDealWith == 3:
            exit(1)
        previousQueryTerm =  currentQueryTerm
        currentQueryTermPostingSelectedList = []
        currentQueryTermPostingSelectedList.append(currentSelectedIndex)
    else:
        currentQueryTermPostingSelectedList.append(currentSelectedIndex)
inputFileHandler.close()
print "Program Ends."